income <- read.csv("https://projectdat.s3.amazonaws.com/income_per_person.csv")
life <- read.csv("https://projectdat.s3.amazonaws.com/life_expectancy_years.csv")
population <- read.csv("https://projectdat.s3.amazonaws.com/population_total.csv")
country <- read.csv("https://projectdat.s3.amazonaws.com/countries_total.csv")
library(dplyr)
#Reshape data set: Income Per Person to make a longitudinal data such that the resulting data set has three columns: country, year, and income.
Income.Per.Person <- income %>% 
  gather(key = "Year",
         value = "Income",
         - geo,
         na.rm = TRUE) 
names(Income.Per.Person)[1] <- "country"
head(Income.Per.Person)
##               country  Year Income
## 1         Afghanistan X1800    603
## 2             Albania X1800    667
## 3             Algeria X1800    715
## 4             Andorra X1800   1200
## 5              Angola X1800    618
## 6 Antigua and Barbuda X1800    757
#Do the same for Life Expectancy in Years
Life.Expectancy.in.Years <- life %>% 
  gather(key = "Year",
         value = "Life.Expectancy",
         - geo,
         na.rm = TRUE)
names(Life.Expectancy.in.Years)[1] <- "country"
head(Life.Expectancy.in.Years)
##               country  Year Life.Expectancy
## 1         Afghanistan X1800            28.2
## 2             Albania X1800            35.4
## 3             Algeria X1800            28.8
## 5              Angola X1800            27.0
## 6 Antigua and Barbuda X1800            33.5
## 7           Argentina X1800            33.2
#Merge/join the above two longitudinal data sets to make a new data set, under name LifeExpIncom that has variables: country, year, lifeExp and income.
LifeExpIncom <- merge(Income.Per.Person, Life.Expectancy.in.Years, by = c("country", "Year"))

#Merge LifeExpIncom with country region so that the final data set has information about income, life expectancy, and country region.
LifeExpIncomFinal <- merge(LifeExpIncom, country, by.x = "country", by.y = "name", all.x = TRUE)

#Merge the previous resulting data set with population size so that the final data set has information about income, life expectancy, population size, and country region.
Pop <- population %>% 
  gather(key = "Year",
         value = "Population",
         - geo,
         na.rm = TRUE)
names(Pop)[1] <- "country"
head(Pop)
##               country  Year Population
## 1         Afghanistan X1800    3280000
## 2             Albania X1800     410000
## 3             Algeria X1800    2500000
## 4             Andorra X1800       2650
## 5              Angola X1800    1570000
## 6 Antigua and Barbuda X1800      37000
LEIP <- merge(LifeExpIncomFinal, Pop, by = c("country", "Year"))
#Make an interactive scatter plot to display the association between life expectancy and income for the year 2015. [required]
##Set the point size to be proportional to the population size
##Use different colors for different countries.
##Choose an appropriate transparency level so that overlapped points can be viewed.
##Choose an appropriate color to highlight the point boundary so that partially overlapped points can be easily distinguished.
##Include the country name and population size in the hover text.
LEIP.2015 <- LEIP %>% 
  filter(
    Year == "X2015"
  ) %>% 
  mutate(pmm = Population / 1000000) %>% 
select(country, Year, Life.Expectancy, Income, Population, pmm)


plot_ly(
  data = LEIP.2015,
  x = ~Income,
  y = ~Life.Expectancy,
  color = ~country,
  stroke = I("royalblue"),
  hoverinfo = "text",
  hovertext = paste("Country: ", LEIP.2015$country,
                    "<br> Population: ", LEIP.2015$pmm, "Million"),
  alpha = 0.6,
  size = ~Population,
  sizes = c(10, 5000),
  type = "scatter",
  mode = "markers"
) %>% 
  layout(title = "Association Between Life Expectancy & Income in 2015",
                  titlefont = list(
                  family = "Helvetica",
                  size = 25,
                  color = 'Royalblue'),
         margin = 10,
         plot_bgcolor = "#e5ecf6", 
         xaxis = list(title = 'Income (US$)'), 
         yaxis = list(title = 'Life Expectancy (Yr)'),
         showlegend = FALSE)
#Make an animated scatter plot that shows pattern of change in the relationship between life expectancy and income over the years. [required]
##Set the point size to be proportional to the population size
##Use different colors for different regions.
##Choose an appropriate transparency level so that overlapped points can be viewed.
##Choose an appropriate color to highlight the point boundary so that partially overlapped points can be easily distinguished.
LEIP2 <-mutate(LEIP, year.num = as.numeric(substring(Year, 2)))
p <- ggplot(LEIP2, aes(x = Income, 
                      y = Life.Expectancy,
                      fill = region)) +
        geom_point(aes(size = Population),
                  color = 'black',
                   pch = 21,
                   show.legend = FALSE, 
                   alpha = 0.3) +
#        scale_color_manual(values = country_colors) + 
        scale_size_continuous(range = c(2, 25)) +
        scale_x_log10() +
        labs(title = "Year: {frame_time}",
             x = "Income (US$)", 
             y = "Life Expectancy (Yr)") +
       transition_time(year.num) +
       ease_aes('linear')

anim_save("LifeExp.gif", p)

animate(p, renderer = gifski_renderer(),
          rewind = TRUE)

#Choose an appropriate R map library to create an interactive map of the gas station data and show some information of each gas station on the map.
##Take a random sample 500 gas stations from the US to plot on the map
##The information to be included in the hover/popups: State, county, address and the zip code.
GS <- read.csv("https://projectdat.s3.amazonaws.com/POC.csv")
GS.500 <- GS[sample(nrow(GS), 500), ]

# geo styling
g <- list(      scope = 'usa',
           projection = list(type = 'albers usa'),
             showland = TRUE,
            landcolor = toRGB("gray95"),
         subunitcolor = toRGB("gray85"),
         countrycolor = toRGB("gray85"),
         countrywidth = 0.5,
         subunitwidth = 0.5
       )

fig <- plot_geo(GS.500, lat = ~ycoord, lon = ~xcoord) %>% 
  add_markers( text = ~paste(STATE, county, ADDRESS, ZIPnew, sep = "<br>"),
              color = 'Orange', 
              symbol = "circle", 
              hoverinfo = "text")   %>% 
  layout( title = '500 Randomly Selected Gas Stations in the U.S.', 
          geo = g )

fig